
/* 

Paper: Gentrification and pioneer businesses 
Authors: Behrens, Boulam, Martin, Mayneris 

Name dofile: create_db_concordedblocks_philly.do  
Version: 16 nov. 2021 

Output: id_conc_database_philly.dta 

Inputs:

- block_group_phil.dta (from ???)
- block_phil.dta (from ???)
- concord_philly9010.dta (authors' computation, publicly available)
- idconc_lat_lon_philly.dta (authors' computation, publicly available)
*/ 

// I. Merge var block_group avec variables block 

* deal with duplicated observations
use block_group_phil, clear 
duplicates tag statea countya tracta blkgrpa year, g(tag) 

foreach i in pop_block_group edu_college_block{
egen `i'_tot=sum(`i'), by(statea countya tracta blkgrpa year)
}

foreach i in median_income_block percap_income_block median_yearb_block median_crent_block median_grent_block median_value_block{
replace `i'=`i'*pop_block_group/pop_block_group_tot 
egen `i'_tot=sum(`i') , by(statea countya tracta blkgrpa year)
}

foreach i in pop_block_group edu_college_block median_income_block percap_income_block median_yearb_block median_crent_block median_grent_block median_value_block{
replace `i'=`i'_tot 
drop `i'_tot 
}
duplicates drop statea countya tracta blkgrpa year, force
keep statea countya tracta blkgrpa year pop_block_group edu_college_block median_income_block percap_income_block median_yearb_block median_crent_block median_grent_block median_value_block
save temp, replace 

* merge block and block groups 
use block_phil, clear 
foreach i in pop pop_white_block pop_black_block pop_asian_block pop_other_block male female households housing occupied vacant renter_occupied owner_occupied hispanic {
egen `i'_=sum(`i'), by(statea countya tracta blocka year)
replace `i'=`i'_
drop `i'_
} // not a collapse to conserve  the label 
duplicates drop statea countya tracta blocka year, force 
sort statea countya tracta blkgrpa year 
merge m:1 statea countya tracta blkgrpa year using temp
keep if _m==3
drop _m 
erase temp.dta 
  
// II. Build block level variables 

replace edu_college_block=edu_college_block*pop/pop_block_group

// III. Merge with stable geography (id_conc) 

foreach i in statea countya tracta blocka{
tostring `i', replace
}

replace statea = "0"+ statea if length(statea)==1 
replace countya = "0"+ countya if length(countya)==2 
replace countya = "00"+ countya if length(countya)==1 
replace tracta = "0"+ tracta if length(tracta)==5  
replace tracta = "00"+ tracta if length(tracta)==4    
replace tracta = "000"+ tracta if length(tracta)==3  
replace tracta = "0000"+ tracta if length(tracta)==2   
replace tracta = "00000"+ tracta if length(tracta)==1   
replace blocka = "0"+ blocka if length(blocka)==3 
replace blocka = "00"+ blocka if length(blocka)==2
replace blocka = "000"+ blocka if length(blocka)==1

g geoid=statea+countya+tracta+blocka

g _=length(gisjoin) 
replace geoid = substr(gisjoin,2,2) + substr(gisjoin,5,3) + substr(gisjoin,9,4) +"00" + "0"+ substr(gisjoin,13,3) if _==15  & year==1990
replace geoid = substr(gisjoin,2,2) + substr(gisjoin,5,3) + substr(gisjoin,9,4) +"00" + substr(gisjoin,13,4) if _==16  & year==1990
replace geoid = substr(gisjoin,2,2) + substr(gisjoin,5,3) + substr(gisjoin,9,6) + "0"+ substr(gisjoin,15,3) if _==17 & year==1990 
replace geoid = substr(gisjoin,2,2) + substr(gisjoin,5,3) + substr(gisjoin,9,6) + substr(gisjoin,15,4) if _==18  & year==1990


sort geoid year
merge 1:1 geoid year using concord_philly9010
drop _m 

// IV. Build variables at the id_conc level 

foreach v of var median_yearb_block median_crent_block median_grent_block median_value_block  {
bys id_conc year: egen housing_tot=sum(housing) if (`v'>0&`v'!=.)
gen sh=housing/housing_tot
replace `v'=sh*`v'
drop sh housing_tot
}

foreach v of var median_income_block percap_income_block male female households hispanic pop_white_block pop_black_block pop_asian_block pop_other_block {
bys id_conc year: egen pop_tot=sum(pop) if `v'!=.
gen sh=pop/pop_tot
replace `v'=sh*`v'
drop sh pop_tot
}

*We identify conc_blocks which have only missing values for each variable, 
*so as to replace the information as missing after the collapse

foreach v of var pop pop_* housing edu_college_block median_yearb_block median_crent_block median_grent_block median_value_block  median_income_block percap_income_block{
gen miss_`v'= `v'!=.
}

collapse (sum) pop pop_* housing edu_college_block median_* percap_income_block miss_* (count) num_geoid=pop, by(id_conc year) 
 
foreach v of var pop pop_* edu_college_block median_yearb_block median_crent_block median_grent_block median_value_block  median_income_block percap_income_block{
replace `v'=. if miss_`v'==0
}

sort id_conc 
merge m:1 id_conc using idconc_lat_lon_philly
drop _m

*We create distance to CBD
gen lat_ws=39.949594
gen lon_ws=-75.150260

gen dis_ws = acos(sin((intplat10*_pi)/180) * sin((lat_ws*_pi)/180) + cos(abs((intplon10*_pi)/180- (lon_ws*_pi)/180)) * cos(intplat10*_pi/180) * cos(lat_ws*_pi/180)) * 6378.137

egen t=group(year)

saveold id_conc_database_philly.dta, replace v(13)

